#################################################################
# "Democracy, Inequality and Antitrust"                         #
#   By Michael O Allen, Kenneth Scheve, and David Stasavage     #
#                                                               #
#   Last modified: 12/21/2024                                   #
#################################################################

library(data.table)
library(ggplot2)
library(cowplot)
library(PanelMatch)
library(fixest)
library(marginaleffects)
library(interflex)
library(xtable)
library(modelsummary)

library(here)


# Function ----------------------------------------------------------------
getGroups <- Vectorize(function (x) x$fixef_sizes[['code_cowc']])

# Adding commas to things
f <- function (x) format(x, big.mark = ',')


# Table 1 -----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))

### Analyses ------------------------------------------------------------

t1_1 <- feols(p3_CLI_DV ~  dem_lag1 | code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

t1_2 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1_ln | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

t1_3 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

t1_4 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln +
              trade_openness_lag1 | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

t1_5 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln +
              trade_openness_lag1 +
              crisis_3_percent_lag1 | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])


### Gen table -------------------------------------------------------------

coefNames <- c("dem_lag1" = "Democracy",
               "top1Share_interp_lag1_ln" = "ln Top 1% Income Share",
               "dem_lag1:top1Share_interp_lag1_ln" = "ln Top 1% Income Share x Democracy",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_3_percent_lag1" = "Economic Crisis"
)

models <- list(t1_1, t1_2, t1_3, t1_4, t1_5)
names(models) <- paste0("(", 1:length(models), ")")

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", 5),
                                "Country & Period FE?", rep("Yes", 5),
                                "Countries", getGroups(models)),
                              ncol = 6, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country",
         output = here('output', 'tables', 'table-1.tex')
)


## Scaled estimate ----------------------------------------------------------------------

m_scaled_dv <- feols(scale(p3_CLI_DV) ~  dem_lag1 * top1Share_interp_lag1_ln +
                       gdp_lag1_ln +
                       gdppc_lag1_ln +
                       trade_openness_lag1 +
                       crisis_3_percent_lag1 | 
                       code_cowc + p3,
                     data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

# Log-point increase in inequality reduces association 
#   between democracy and CLI by .674 SDs
m_scaled_dv$coefficients['dem_lag1:top1Share_interp_lag1_ln']

# Figure 2 ----------------------------------------------------------------

# Uses estimates from Table 1 above
cme_data <- plot_cme(t1_5, 
                     variables = "dem_lag1", 
                     condition = "top1Share_interp_lag1_ln",
                     draw = FALSE)
ggplot(cme_data,
       aes(x = top1Share_interp_lag1_ln, y = estimate)) +
  geom_hline(yintercept = 0, color = 'red', size = .5, linetype = 'dashed') +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, 
                  ymax = conf.high), alpha = .2) +
  labs(x = 'ln Top 1% Income Share',
       y = 'Marginal Effect of Democracy on CLI') +
  scale_x_continuous(limits = c(1.5, 3.5)) +
  scale_y_continuous(limits = c(-.25,.5),
                     breaks = seq(-.2, .5, .1)) +
  theme_minimal(base_size = 14) +
  theme(legend.position = 'none',
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank())

ggsave(here('output', 'figures/figure-2.png'),
       width = 6, height = 4, units = 'in', dpi = 450)


# Figure 3 ----------------------------------------------------------------

# Load the function to plot
source(here('scripts', 'plot-function.R'))

## Data -------------------------------------------------------------------

df_f3 <- fread(here('data', 'cli_yearly.csv'))

df_f3[ , cid := as.integer(factor(code_cowc))]
df_f3[ , gdp_ln := log(gdp)]
df_f3[ , gdppc_ln := log(gdppc)]

vars <- c('cli_overall_norm',
          'gdp_ln',
          'gdppc_ln',
          'trade_openness',
          'top1Share_interp_ln')

df_f3[ , paste0(vars, '_lm3') := (frollmean(.SD, 3, align = 'right',
                                               na.rm = T, hasNA = T)),
          .SDcols = vars,
          by = code_cowc]

## Create subsets ----------------------------------------------------------
# Low inequality transitions only
# Drop transitions + decade that occurred with above median levels of inequality or unknown

df_f3$lowInequality_subset_drop <- 1

for (r in which((df_f3$top1Share_interp >= median(df_f3$top1Share_interp, na.rm = T) |
                 is.na(df_f3$top1Share_interp)) 
                & df_f3$dem_trans == 1)) {
  df_f3[ r:(r+10), lowInequality_subset_drop := 0]
}


# High inequality subset
# Drop transitions that occurred with below median levels of inequality or unknown

df_f3$highInequality_subset_drop <- 1

for (r in which((df_f3$top1Share_interp < median(df_f3$top1Share_interp, na.rm = T) |
                 is.na(df_f3$top1Share_interp)) 
                & df_f3$dem_trans == 1)) {
  df_f3[ r:(r+10), highInequality_subset_drop := 0]
}

# Analysis -----------------------------------------------------------------

## Low inequality -------------------------------------------------------------------

data_polity_low <- as.data.frame(df_f3[lowInequality_subset_drop == 1 & year >= 1945])

pm_lowInequality <- PanelMatch(lag = 5, lead = 0:10, 
                               time.id = 'year', unit.id = 'cid', 
                               treatment = 'dem_bin', 
                               outcome.var = 'cli_overall_norm',
                               refinement.method = 'CBPS.weight',
                               covs.formula = ~ 
                                 lag(cli_overall_norm_lm3, 1) +
                                 lag(gdp_ln_lm3, 1) +
                                 lag(gdppc_ln_lm3, 1) +
                                 lag(trade_openness_lm3, 1) +
                                 lag(top1Share_interp_ln_lm3, 1),
                               qoi = 'att', 
                               match.missing = FALSE,
                               forbid.treatment.reversal = FALSE,
                               data = data_polity_low)

set.seed(1)
pe_low <- PanelEstimate(pm_lowInequality, number.iterations = 2000,
                        data = data_polity_low)
low_est <- round(summary(pe_low)$summary[, 1:2] ,3)

## High inequality ------------------------------------------------------------------

data_polity_high <- as.data.frame(df_f3[highInequality_subset_drop == 1 & year >= 1945])

pm_highInequality <- PanelMatch(lag = 5, lead = 0:10, 
                                time.id = 'year', unit.id = 'cid', 
                                treatment = 'dem_bin', 
                                outcome.var = 'cli_overall_norm',
                                refinement.method = 'CBPS.weight',
                                covs.formula = ~ 
                                  lag(cli_overall_norm_lm3, 1) +
                                  lag(gdp_ln_lm3, 1) +
                                  lag(gdppc_ln_lm3, 1) +
                                  lag(trade_openness_lm3, 1) +
                                  lag(top1Share_interp_ln_lm3, 1),
                                qoi = 'att', 
                                match.missing = FALSE, 
                                forbid.treatment.reversal = FALSE,
                                data = data_polity_high)

# Used to make Table C.1
set.seed(1)
pe_high <- PanelEstimate(pm_highInequality, number.iterations = 2000,
                         data = data_polity_high)
high_est <- round(summary(pe_high)$summary[, 1:2] ,3)

## Create plots ------------------------------------------------------------

nicePMplot(pm_lowInequality, data_polity_low,
           yupper = .4, ylower = -0.21,
           seed = 1)

ggsave(here('output', 'figures', 'figure-3a.png'),
       width = 6, height = 4, units = 'in', dpi = 450)

nicePMplot(pm_highInequality, data_polity_high,
           yupper = .4, ylower = -0.21,
           seed = 1)

ggsave(here('output', 'figures', 'figure-3b.png'),
       width = 6, height = 4, units = 'in', dpi = 450)

# Table C1 ----------------------------------------------------------------

# This formats the estimates and SEs from PanelMatch into a table.

low_est <- t(matrix(c(low_est[ , 1], paste0( "(", low_est[, 2], ")")),
       ncol = 2))
low_est <- matrix(low_est, byrow = TRUE)

high_est <- t(matrix(c(high_est[ , 1], paste0( "(", high_est[, 2], ")")),
                    ncol = 2))
high_est <- matrix(high_est, byrow = TRUE)

t <- rep("", 22)
t[seq(1, 21, 2)] <- 0:10

tc1 <- xtable(cbind(t, low_est, high_est))

print(tc1,
      include.rownames = FALSE,
      file = here('output', 'tables', 'table-c1.tex'))

# Table A1 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))


## Analyses -------------------------------------------------------------


ta1_1 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_lag1_ln | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

ta1_2 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

ta1_3 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln +
              trade_openness_lag1 | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

ta1_4 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 +
                 crisis_lag1  
                 | code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

ta1_5 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln +
              trade_openness_lag1 +
              crisis_3_percent_lag1 | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])


## Generate Table A1 ------------------------------------------------------

coefNames <- c("top1Share_lag1_ln" = "ln Top 1\\% Income Share\\textsubscript{Observed}",
               "dem_lag1" = "Democracy",
               "dem_lag1:top1Share_lag1_ln" = "ln Top 1\\% Income Share\\textsubscript{Observed} $\\times$ Democracy",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_lag1" = "Economic Crisis\\textsubscript{RR}",
               "crisis_3_percent_lag1" = "Economic Crisis\\textsubscript{GDP -3\\%}"
)

models <- list("(1)" = ta1_1, 
               "(2)" = ta1_2, 
               "(3)" = ta1_3, 
               "(4)" = ta1_4,
               "(5)" = ta1_5)

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = 6, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         escape = FALSE,
         title = "Re-run of Table \\ref{tab:crossnat_main} with no interpolated inequality data",
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         output = here('output', 'tables', 'table-a1.tex')
)


# Table A2 ----------------------------------------------------------------

ta2 <- df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ]
ta2 <- ta2[unlist(t1_5$obs_selection), ]
setDT(ta2)

ta2 <- ta2[ , .(obs = .N,
                  firstPeriod = p3[1]) , by = country]

periods <- names(attributes(df_t1$p3)$labels)

ta2$firstYear <- substring(periods[ta2$firstPeriod], 2,5)

ta2 <- xtable(ta2[ , c('country', 'obs', 'firstYear')])
print(ta2,
      file = here('output', 'tables', 'table-a2.tex'))

rm(ta2, periods)

# Figure B1 ---------------------------------------------------------------

## Function ---------------------------------------------------------------

# This function collapses the yearly data into different time periods
#   and estimates the models for Figure B1.

altDuration_CME <- function(df) {
  results <- data.table()
  
  for (duration in 1:8) {
    
    df_temp <- df
    
    if (duration != 7) {
      df_temp$period <- cut(df_temp$year, seq(1890, 2010, by=duration),
                            right=T)  
    } else  { # To ensure we include 2010 in the analysis
      df_temp$period <- cut(df_temp$year, seq(1891, 2010, by=duration),
                            right=T)
    }
    
    df_temp <- df_temp[!is.na(period)]
    
    setorder(df_temp, code_cowc, year)
    
    df_temp[ , DV := cli_overall_norm[1] , by = .(code_cowc, period)]
    
    df_temp <- df_temp[, .(DV = unique(DV),
                           top1Share_interp = mean(top1Share_interp, na.rm = TRUE),
                           dem = mean(dem_bin, na.rm = TRUE),
                           gdppc = mean(gdppc, na.rm = TRUE),
                           gdp = mean(gdp, na.rm = TRUE),
                           crisis_3_percent = mean(crisis_3_percent, na.rm = TRUE),
                           trade_openness = mean(trade_openness, na.rm = TRUE),
                           firstYear = min(year),
                           dem_trans = sum(dem_trans, na.rm = TRUE),
                           top1_n = unique(top1_n)),
                       by = .(code_cowc, period)]
    
    setorder(df_temp, code_cowc, period)
    
    df_temp[ , c('top1Share_lag1',
                 'dem_lag1',
                 'gdppc_lag1',
                 'gdp_lag1',
                 'crisis_3_percent_lag1',
                 'trade_openness_lag1',
                 'dem_trans_lag1') := shift(.SD, 1, NA, 'lag'),
             .SDcols = c('top1Share_interp', 'dem', 'gdppc', 'gdp', 
                         'crisis_3_percent', 'trade_openness',
                         'dem_trans'),
             by = code_cowc]
    
    df_temp[ , gdp_lag1 := log(gdp_lag1)]
    df_temp[ , gdppc_lag1 := log(gdppc_lag1)]
    df_temp[ , top1Share_lag1 := log(top1Share_lag1*100)]
    df_temp[ , p_int := as.integer(period)]
    
    df_temp <- df_temp[top1_n >= 20]
    
    model <- feols(DV ~ top1Share_lag1 * dem_lag1 
                   + gdppc_lag1 
                   + gdp_lag1 
                   + trade_openness_lag1
                   + crisis_3_percent_lag1
                   | code_cowc + period,
                   data = df_temp)
    
    cme_data <- plot_cme(model, 
                         variables = "dem_lag1", 
                         condition = "top1Share_lag1",
                         draw = FALSE)
    
    
    cme_data$period_length <- duration
    results <- rbind(results, cme_data[, c("period_length", "estimate", 
                                           "conf.low", "conf.high",
                                           "top1Share_lag1")])
  }
  
  return(results)
}


## Load data --------------------------------------------------------------

df_yearly <- fread(here('data', 'cli_yearly.csv'))

## Generate Figure B1 -----------------------------------------------------

results <- altDuration_CME(df_yearly)

results$period_length <- factor(results$period_length, 
                                levels = 1:8,
                                labels = paste(1:8, 'year periods'))

ggplot(results,
       aes(x = top1Share_lag1, y = estimate)) +
  geom_line() +
  geom_hline(yintercept = 0, color = 'red', size = .5, linetype = 'dashed') +
  geom_ribbon(aes(ymin = conf.low, 
                  ymax = conf.high), alpha = .2) +
  labs(x = 'ln Top 1% Income Share',
       y = 'Conditional Marginal Effect of Democracy on CLI') +
  scale_x_continuous(limits = c(1.5, 3.5)) +
  theme_minimal(base_size = 14) +
  facet_wrap(~period_length) +
  theme(legend.position = 'none')

ggsave(here('output', 'figures', 'figure-b1.pdf'),
       width = 7, height = 7)

rm(altDuration_CME, results)


# Table B1 ----------------------------------------------------------------

df_yearly <- fread(here('data', 'cli_yearly.csv'))

df_yearly[ , gdp_ln := log(gdp)]
df_yearly[ , gdppc_ln := log(gdppc)]

tb1_1 <- feols(f(cli_overall_norm) ~ dem_bin * top1Share_interp_ln 
               | code_cowc + year,
               panel.id = ~code_cowc + year,
               df_yearly[ year >= 1985])

tb1_2 <- feols(f(cli_overall_norm) ~ dem_bin * top1Share_interp_ln 
               + gdp_ln
               + gdppc_ln
               | code_cowc + year,
               panel.id = ~code_cowc + year,
               df_yearly[ year >= 1985])

tb1_3 <- feols(f(cli_overall_norm) ~ dem_bin * top1Share_interp_ln 
               + gdp_ln
               + gdppc_ln
               + trade_openness
               | code_cowc + year,
               panel.id = ~code_cowc + year,
               df_yearly[ year >= 1985])

tb1_4 <- feols(f(cli_overall_norm) ~ dem_bin * top1Share_interp_ln 
               + gdp_ln
               + gdppc_ln
               + trade_openness
               + crisis_3_percent
               | code_cowc + year,
               panel.id = ~code_cowc + year,
               df_yearly[ year >= 1985])

coefNames <- c("dem_bin" = "Democracy",
               "top1Share_interp_ln" = "ln Top 1\\% Income Share",
               "dem_bin:top1Share_interp_ln" = "ln Top 1\\% Income Share $\\times$ Democracy",
               "gdp_ln" = "ln GDP",
               "gdppc_ln" = "ln GDP per cap.",
               "trade_openness" = "Trade Openness",
               "crisis_3_percentTRUE" = "Economic Crisis"
)

models <- list("(1)" = tb1_1, 
               "(2)" = tb1_2, 
               "(3)" = tb1_3, 
               "(4)" = tb1_4)

new_rows <- data.frame(matrix(c("Period Length", rep("1 year", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = length(models) + 1, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         escape = FALSE,
         output = here('output', 'tables', 'table-b1.tex')
)

# Table B2 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))


## Analyses ---------------------------------------------------------------

tb2_1 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb2_2 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1 +
                 gdp_lag1_ln +
                 gdppc_lag1_ln | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb2_3 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1 +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb2_4 <- feols(p3_CLI_DV ~  dem_lag1 * top1Share_interp_lag1 +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 +
                 crisis_3_percent_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])


## Gen. Table B2 ----------------------------------------------------------

coefNames <- c("dem_lag1" = "Democracy",
               "top1Share_interp_lag1" = "Top 1\\% Income Share",
               "dem_lag1:top1Share_interp_lag1" = "Top 1\\% Income Share $\\times$ Democracy",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_3_percent_lag1" = "Economic Crisis"
)

models <- list("(1)" = tb2_1, 
               "(2)" = tb2_2, 
               "(3)" = tb2_3, 
               "(4)" = tb2_4)

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = length(models) + 1, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         escape = FALSE,
         output = here('output', 'tables', 'table-b2.tex')
)


# Figure B2 ---------------------------------------------------------------

# Uses estimates from Model 4, Table B2 above)
cme_data <- plot_cme(tb2_4, 
                     variables = "dem_lag1", 
                     condition = "top1Share_interp_lag1",
                     draw = FALSE)
ggplot(cme_data,
       aes(x = top1Share_interp_lag1, y = estimate)) +
  geom_hline(yintercept = 0, color = 'red', size = .5, linetype = 'dashed') +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, 
                  ymax = conf.high), alpha = .2) +
  labs(x = 'Top 1% Income Share',
       y = 'Marginal Effect of Democracy on CLI') +
  theme_minimal(base_size = 14) +
  theme(legend.position = 'none',
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank())

ggsave(here('output', 'figures', 'figure-b2.pdf'),
       width = 6, height = 4, units = 'in')


# Table B3 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))

## Analyses ---------------------------------------------------------------

tb3_1 <- feols(p3_CLI_DV ~  dem_lag1 * gini_disp_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb3_2 <- feols(p3_CLI_DV ~  dem_lag1 * gini_disp_lag1 +
                 gdp_lag1_ln +
                 gdppc_lag1_ln | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb3_3 <- feols(p3_CLI_DV ~  dem_lag1 * gini_disp_lag1 +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb3_4 <- feols(p3_CLI_DV ~  dem_lag1 * gini_disp_lag1 +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 +
                 crisis_3_percent_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])


## Generate Table B3 ------------------------------------------------------

coefNames <- c("dem_lag1" = "Democracy",
               "gini_disp_lag1" = "Gini\\textsubscript{post-transfer}",
               "dem_lag1:gini_disp_lag1" = "Gini\\textsubscript{post-transfer} $\\times$ Democracy",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_3_percent_lag1" = "Economic Crisis"
)

models <- list("(1)" = tb3_1, 
               "(2)" = tb3_2, 
               "(3)" = tb3_3, 
               "(4)" = tb3_4)

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = length(models) + 1, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         escape = FALSE,
         output = here('output', 'tables', 'table-b3.tex')
)


# Table B4 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))

setDT(df_t1)
setorder(df_t1, code_cowc, p3)
df_t1[ , polity_lag1 := shift(x = polity, n = 1L, fill = NA, type = 'lag'), by = code_cowc]


## Analyses ---------------------------------------------------------------

tb4_1 <- feols(p3_CLI_DV ~  polity_lag1 | code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb4_2 <- feols(p3_CLI_DV ~  polity_lag1 * top1Share_interp_lag1_ln | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb4_3 <- feols(p3_CLI_DV ~  polity_lag1 * top1Share_interp_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb4_4 <- feols(p3_CLI_DV ~  polity_lag1 * top1Share_interp_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb4_5 <- feols(p3_CLI_DV ~  polity_lag1 * top1Share_interp_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 +
                 crisis_3_percent_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])


## Gen. Table B4 ----------------------------------------------------------

coefNames <- c("polity_lag1" = "Polity",
               "top1Share_interp_lag1_ln" = "ln Top 1\\% Income Share",
               "polity_lag1:top1Share_interp_lag1_ln" = "ln Top 1\\% Income Share $\times$ Polity",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_3_percent_lag1" = "Economic Crisis"
)

models <- list("(1)" = tb4_1, 
               "(2)" = tb4_2, 
               "(3)" = tb4_3, 
               "(4)" = tb4_4, 
               "(5)" = tb4_5)

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = length(models) + 1, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         escape = FALSE,
         output = here('output', 'tables', 'table-b4.tex')
)

# Table B5 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))

## Analyses ---------------------------------------------------------------


tb5_1 <- feols(p3_CLI_DV ~  polyarchy_lag1 * top1Share_interp_lag1_ln | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb5_2 <- feols(p3_CLI_DV ~  polyarchy_lag1 * top1Share_interp_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb5_3 <- feols(p3_CLI_DV ~  polyarchy_lag1 * top1Share_interp_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

tb5_4 <- feols(p3_CLI_DV ~  polyarchy_lag1 * top1Share_interp_lag1_ln +
                 gdp_lag1_ln +
                 gdppc_lag1_ln +
                 trade_openness_lag1 +
                 crisis_3_percent_lag1 | 
                 code_cowc + p3,
               data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])


## Gen. Table B5 ----------------------------------------------------------

coefNames <- c("polyarchy_lag1" = "Polyarchy",
               "top1Share_interp_lag1_ln" = "ln Top 1\\% Income Share",
               "polyarchy_lag1:top1Share_interp_lag1_ln" = "ln Top 1\\% Income Share $\\times$ Polyarchy",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_3_percent_lag1" = "Economic Crisis"
)

models <- list(tb5_1, tb5_2, tb5_3, tb5_4)
names(models) <- paste0("(", 1:length(models), ")")

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = length(models) + 1, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         escape = FALSE,
         output = here('output', 'tables', 'table-b5.tex')
)


# Table D1 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))

## Analyses ---------------------------------------------------------------
td1_1 <- feols(p3_CLI_DV ~  dem_bmr_lag1 | code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

td1_2 <- feols(p3_CLI_DV ~  dem_bmr_lag1 * top1Share_interp_lag1_ln | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

td1_3 <- feols(p3_CLI_DV ~  dem_bmr_lag1 * top1Share_interp_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

td1_4 <- feols(p3_CLI_DV ~  dem_bmr_lag1 * top1Share_interp_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln +
              trade_openness_lag1 | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

td1_5 <- feols(p3_CLI_DV ~  dem_bmr_lag1 * top1Share_interp_lag1_ln +
              gdp_lag1_ln +
              gdppc_lag1_ln +
              trade_openness_lag1 +
              crisis_3_percent_lag1 | 
              code_cowc + p3,
            data = df_t1[df_t1$p3 >= 21 & df_t1$top1_n >= 20, ])

## Gen. Table D1 -----

coefNames <- c("dem_bmr_lag1" = "Democracy\\textsubscript{BMR}",
               "top1Share_interp_lag1_ln" = "ln Top 1\\% Income Share",
               "dem_bmr_lag1:top1Share_interp_lag1_ln" = "ln Top 1\\% Income Share $\\times$ Democracy\\textsubscript{BMR}",
               "gdp_lag1_ln" = "ln GDP",
               "gdppc_lag1_ln" = "ln GDP per cap.",
               "trade_openness_lag1" = "Trade Openness",
               "crisis_3_percent_lag1" = "Economic Crisis"
)

models <- list(td1_1, td1_2, td1_3, td1_4, td1_5)
names(models) <- paste0("(", 1:length(models), ")")

new_rows <- data.frame(matrix(c("Period Length", rep("3 years", length(models)),
                                "Country \\& Period FE?", rep("Yes", length(models)),
                                "Countries", getGroups(models)),
                              ncol = length(models) + 1, byrow = TRUE))

attr(new_rows, 'position') <- c((length(coefNames) * 2) + 1:3)

msummary(models,
         coef_map = coefNames,
         stars = c('*' = .1, '**' = .05, '***' = .01),
         add_rows = new_rows,
         gof_map = list(list('raw' = 'nobs', 'clean' = 'Observations', 'fmt' = f),
                        list('raw' = 'adj.r.squared', 'clean' = 'Adj. R$^2$', 'fmt' = '%.3f')),
         notes = "Standard errors clustered by country. Independent variables lagged by 1 time period.",
         escape = FALSE,
         output = here('output', 'tables', 'table-d1.tex')
)


# Figure D1 ---------------------------------------------------------------

# Uses data processed for Figure 3

## Create subsets ---------------------------------------------------------

# Low inequality
df_f3$lowInequality_subset_drop_bmr <- 1

for (r in which((df_f3$top1Share_interp >= median(df_f3$top1Share_interp, na.rm = T) |
                 is.na(df_f3$top1Share_interp)) 
                & df_f3$dem_trans_bmr == 1)) {
  df_f3[ r:(r+10), lowInequality_subset_drop_bmr := 0]
}


# High inequality subsets

df_f3$highInequality_subset_drop_bmr <- 1

for (r in which((df_f3$top1Share_interp < median(df_f3$top1Share_interp, na.rm = T) |
                 is.na(df_f3$top1Share_interp)) 
                & df_f3$dem_trans_bmr == 1)) {
  df_f3[ r:(r+10), highInequality_subset_drop_bmr := 0]
}

data_bmr_low <- as.data.frame(df_f3[lowInequality_subset_drop_bmr == 1 & year >= 1945])
data_bmr_high <- as.data.frame(df_f3[highInequality_subset_drop_bmr == 1 & year >= 1945])

pm_lowInequality_bmr <- PanelMatch(lag = 5, lead = 0:10, 
                                   time.id = 'year', unit.id = 'cid', 
                                   treatment = 'democracy_bmr', 
                                   outcome.var = 'cli_overall_norm',
                                   refinement.method = 'CBPS.weight',
                                   covs.formula = ~ 
                                     lag(cli_overall_norm_lm3, 1) +
                                     lag(gdp_ln_lm3, 1) +
                                     lag(gdppc_ln_lm3, 1) +
                                     lag(trade_openness_lm3, 1) +
                                     lag(top1Share_interp_ln_lm3, 1),
                                   qoi = 'att', 
                                   forbid.treatment.reversal = FALSE,
                                   data = data_bmr_low)

pm_highInequality_bmr <- PanelMatch(lag = 5, lead = 0:10, 
                                    time.id = 'year', unit.id = 'cid', 
                                    treatment = 'democracy_bmr', 
                                    outcome.var = 'cli_overall_norm',
                                    refinement.method = 'CBPS.weight',
                                    covs.formula = ~ 
                                      lag(cli_overall_norm_lm3, 1) +
                                      lag(gdp_ln_lm3, 1) +
                                      lag(gdppc_ln_lm3, 1) +
                                      lag(trade_openness_lm3, 1) +
                                      lag(top1Share_interp_ln_lm3, 1),
                                    qoi = 'att', 
                                    forbid.treatment.reversal = FALSE,
                                    data = data_bmr_high)


## Plot -------------------------------------------------------------------

nicePMplot(pm_lowInequality_bmr, 
           data_bmr_low,
           yupper = .4, ylower = -0.24,
           seed = 1)

ggsave(here('output', 'figures', 'figure-d1a.pdf'),
       width = 6, height = 4, units = 'in')

nicePMplot(pm_highInequality_bmr, 
           data_bmr_high,
           yupper = .4, ylower = -0.24,
           seed = 1)

ggsave(here('output', 'figures', 'figure-d1b.pdf'),
       width = 6, height = 4, units = 'in')


# Figure E1 ----------------------------------------------------------------

# Done in stata. See figure-e1.do

# Figure F1 ----------------------------------------------------------------

## Data ----
df_t1 <- haven::read_dta(here('data', 'cli_p3.dta'))

## Analyses ---------------------------------------------------------------

m <- interflex(estimator = 'binning',
               Y = 'p3_CLI_DV',
               D = 'dem_lag1',
               X = 'top1Share_interp_lag1_ln',
               FE = c('code_cowc', 'p3'),
               Z = c('gdp_lag1_ln', 'gdppc_lag1_ln', 
                     'trade_openness_lag1', 'crisis_3_percent_lag1'),
               na.rm = TRUE,
               data = as.data.frame(df_t1[df_t1$dem_lag1 %in% 0:1, ]))

plot(m, 
     interval = c(1.5, 3.5),
     Ylabel = "CLI",
     Dlabel = "Democracy",
     Xlabel = "ln Top 1% Income Share",
     theme.bw = TRUE,
     show.grid = FALSE,
     cex.lab = .75,
     cex.axis = .75)

ggsave(here('output', 'figures', 'figure-f1.pdf'),
       width = 5, height = 4, units = 'in')

# Figure F2 ----------------------------------------------------------------

set.seed(94305)
m_kern <- interflex(estimator = 'kernel',
                    Y = 'p3_CLI_DV',
                    D = 'dem_lag1',
                    X = 'top1Share_interp_lag1_ln',
                    FE = c('code_cowc', 'p3'),
                    Z = c('gdp_lag1_ln', 'gdppc_lag1_ln', 
                          'trade_openness_lag1', 'crisis_3_percent_lag1'),
                    na.rm = TRUE,
                    data = as.data.frame(df_t1[df_t1$dem_lag1 %in% 0:1, ]))

plot(m_kern, 
     interval = c(1.5, 3.5),
     Ylabel = "CLI",
     Dlabel = "Democracy",
     Xlabel = "ln Top 1% Income Share",
     theme.bw = TRUE,
     show.grid = FALSE,
     cex.lab = .75,
     cex.axis = .75)

ggsave(here('output', 'figures', 'figure-f2a.pdf'),
       width = 5, height = 4, units = 'in')


# Figures G1 + G2 ------------------------------------------------------

# Uses the output from Figure 3

## Low Cov Bal ---- 
low_refined <- as.data.table(get_covariate_balance(pm_lowInequality$att,
                                                   covariates = c(
                                                     'cli_overall_norm_lm3',
                                                     'gdp_ln_lm3',
                                                     'gdppc_ln_lm3',
                                                     'trade_openness_lm3',
                                                     'top1Share_interp_ln_lm3'),
                                                   use.equal.weights = F,
                                                   data= data_polity_low,
                                                   plot = F))

low_unrefined <- as.data.table(get_covariate_balance(pm_lowInequality$att,
                                                     covariates =  c(
                                                       'cli_overall_norm_lm3',
                                                       'gdp_ln_lm3',
                                                       'gdppc_ln_lm3',
                                                       'trade_openness_lm3',
                                                       'top1Share_interp_ln_lm3'),
                                                     use.equal.weights = T,
                                                     data= data_polity_low,
                                                     plot = F))

low_refined <- melt(low_refined,
                    variable.name = 'variable',
                    value.name = 'sd',
                    measure.vars = colnames(low_refined))
low_refined$t <- rep(-5:0, 5)
low_refined$refined <- 'Refined'

low_unrefined <- melt(low_unrefined,
                      variable.name = 'variable',
                      value.name = 'sd',
                      measure.vars = colnames(low_unrefined))
low_unrefined$t <- rep(-5:0, 5)
low_unrefined$refined <- 'Unrefined'

plotData_low <- rbind(low_refined, low_unrefined)

plotData_low$refined_p <- factor(plotData_low$refined, levels = c('Unrefined', 'Refined'))

## High Cov Bal ----
high_refined <- as.data.table(get_covariate_balance(pm_highInequality$att,
                                                    covariates = c(
                                                      'cli_overall_norm_lm3',
                                                      'gdp_ln_lm3',
                                                      'gdppc_ln_lm3',
                                                      'trade_openness_lm3',
                                                      'top1Share_interp_ln_lm3'),
                                                    use.equal.weights = F,
                                                    data= data_polity_high,
                                                    plot = F))

high_unrefined <- as.data.table(get_covariate_balance(pm_highInequality$att,
                                                      covariates = c(
                                                        'cli_overall_norm_lm3',
                                                        'gdp_ln_lm3',
                                                        'gdppc_ln_lm3',
                                                        'trade_openness_lm3',
                                                        'top1Share_interp_ln_lm3'),
                                                      use.equal.weights = T,
                                                      data= data_polity_high,
                                                      plot = F))

high_refined <- melt(high_refined,
                     variable.name = 'variable',
                     value.name = 'sd',
                     measure.vars = colnames(high_refined))
high_refined$t <- rep(-5:0, 5)
high_refined$refined <- 'Refined'

high_unrefined <- melt(high_unrefined,
                       variable.name = 'variable',
                       value.name = 'sd',
                       measure.vars = colnames(high_unrefined))
high_unrefined$t <- rep(-5:0, 5)
high_unrefined$refined <- 'Unrefined'

plotData_high <- rbind(high_refined, high_unrefined)

plotData_high$refined_p <- factor(plotData_high$refined, levels = c('Unrefined', 'Refined'))

## Plot ---- 

ggplot(plotData_low, aes(y = sd, x = t, color = variable)) +
  geom_line() +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  scale_color_discrete(labels=c('CLI', 
                                'GDP',
                                'GDP per cap.',
                                'Trade Dep.',
                                'Top 1%')) +
  scale_y_continuous(limits = c(-.5, 6.75),
                     breaks = 0:7) +
  facet_wrap(~refined_p) +
  labs(y = 'SD',
       x = 'Time to Democratization') +
  theme_cowplot() +
  theme(legend.position = 'bottom',
        legend.title = element_blank())

ggsave(here('output', 'figures', 'figure-g1.pdf'),
       height = 4, width = 7, units = 'in')

ggplot(plotData_high, aes(y = sd, x = t, color = variable)) +
  geom_line() +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  scale_color_discrete(labels=c('CLI', 
                                'GDP',
                                'GDP per cap.',
                                'Trade Dep.',
                                'Top 1%')) +
  scale_y_continuous(limits = c(-.5, 6.75),
                     breaks = 0:7) +
  facet_wrap(~refined_p) +
  labs(y = 'SD',
       x = 'Time to Democratization') +
  theme_cowplot() +
  theme(legend.position = 'bottom',
        legend.title = element_blank())

ggsave(here('output', 'figures', 'figure-g2.pdf'),
       height = 4, width = 7, units = 'in')

